Analysis date: 2023-09-18

Depends on

CRC_Xenografts_FirstBatch_DataProcessing Script

load("../Data/Cache/Xenografts_Batch1_2_DataProcessing.RData")

TODO

Setup

Load libraries and functions

Quality control

Nr. phospho sites

print( paste( nrow(pY_Set1) , "pY peptides passed the filtering procedure for Set 1. These peptides were detected from", length(unique(pY_Set1$HGNC_Symbol) ), "proteins." ))
## [1] "254 pY peptides passed the filtering procedure for Set 1. These peptides were detected from 174 proteins."
#print( paste( nrow(pST_Set1) , "pST peptides passed the filtering procedure for Set 1. These peptides were detected from", length(unique(pST$HGNC_Symbol) ), "proteins." ))

print( paste( nrow(pY_Set2) , "pY peptides passed the filtering procedure for Set 2. These peptides were detected from", length(unique(pY_Set2$HGNC_Symbol) ), "proteins." ))
## [1] "627 pY peptides passed the filtering procedure for Set 2. These peptides were detected from 389 proteins."
#print( paste( nrow(pST_Set2) , "pST peptides passed the filtering procedure for Set 2. These peptides were detected from", length(unique(pST$HGNC_Symbol) ), "proteins." ))

print( paste( nrow(pY_noNA) , "pY peptides passed the filtering procedure for the sets combined. These peptides were detected from", length(unique(pY_noNA$HGNC_Symbol) ), "proteins." ))
## [1] "231 pY peptides passed the filtering procedure for the sets combined. These peptides were detected from 163 proteins."
#print( paste( nrow(pST_noNA) , "pST peptides passed the filtering procedure for the sets combined. These peptides were detected from", length(unique(pST_noNA$HGNC_Symbol) ), "proteins." ))

Nr. proteins whole proteome

print( paste( length(unique(prot_Set1$HGNC_Symbol) ), "proteins detected in Set 1." ))
print( paste( length(unique(prot_Set2$HGNC_Symbol) ), "proteins detected in Set 2." ))

print( paste( length(unique(prot_top3peptidemedian$HGNC_Symbol) ), "proteins detected in both Sets." ))

Distributions

pY Abundances

Set 1

pY_Set1 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e4) +
  ggtitle("Raw abundances")
## Warning: Removed 374 rows containing non-finite values (`stat_density()`).

pY_Set1 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e4) +
  ggtitle("Abundances normalised to sup")
## Warning: Removed 358 rows containing non-finite values (`stat_density()`).

pY_Set1 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Raw abundances")

pY_Set1 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Abundances normalised to sup")

pY_Set1 %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to bridge")

Set 2

pY_Set2 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e4) +
  ggtitle("Raw abundances")
## Warning: Removed 591 rows containing non-finite values (`stat_density()`).

pY_Set2 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e4) +
  ggtitle("Abundances normalised to sup")
## Warning: Removed 597 rows containing non-finite values (`stat_density()`).

pY_Set2 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Raw abundances")

pY_Set2 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Abundances normalised to sup")

pY_Set2 %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to bridge")

Combined

Colored by treatment
pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_"), drop = F) %>%
  separate( Sample , into = c("xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  ggplot(aes(value, fill= treatment, group = Sample)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to normal") +
  theme_bw() +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])

pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate( Sample , into = c( "xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  ggplot(aes(Sample, value, fill= treatment)) +
  geom_boxplot() +
  ggtitle("log2FC to normal") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])

pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate( Sample , into = c( "xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  group_by(Sample, treatment, day, replicate, set) %>%
  summarise(mean_value = mean(value)) %>%
  ggplot(aes(treatment, mean_value, fill= treatment)) +
  geom_boxplot() +
  ggbeeswarm::geom_beeswarm() +
  ggtitle("log2FC to normal") +
  ggpubr::stat_compare_means() +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])
## `summarise()` has grouped output by 'Sample', 'treatment', 'day', 'replicate'.
## You can override using the `.groups` argument.

Colored by day
pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_"), drop = F) %>%
  separate( Sample , into = c("xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  ggplot(aes(value, fill= day, group = Sample)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to normal") +
  theme_bw() +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])

pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate( Sample , into = c( "xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  group_by(Sample, treatment, day, replicate, set) %>%
  summarise(mean_value = mean(value)) %>%
  ggplot(aes(day, mean_value, fill= day)) +
  geom_boxplot() +
  ggbeeswarm::geom_beeswarm() +
  ggtitle("log2FC to normal") +
  ggpubr::stat_compare_means() +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])
## `summarise()` has grouped output by 'Sample', 'treatment', 'day', 'replicate'.
## You can override using the `.groups` argument.

pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate( Sample , into = c( "xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  group_by(Sample, treatment, day, replicate, set) %>%
  summarise(mean_value = mean(value)) %>%
  ggplot(aes(day, mean_value, fill= day)) +
  geom_boxplot() +
  ggbeeswarm::geom_beeswarm() +
  ggtitle("log2FC to normal") +
  ggpubr::stat_compare_means() +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)]) +
  facet_grid(~treatment)
## `summarise()` has grouped output by 'Sample', 'treatment', 'day', 'replicate'.
## You can override using the `.groups` argument.
## Warning: Computation failed in `stat_compare_means()`
## Caused by error:
## ! argument "x" is missing, with no default

Colored by Prep
pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_"), drop = F) %>%
  separate( Sample , into = c("xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  mutate(prep = unlist(prep_l[Sample] )) %>%
  ggplot(aes(value, fill= prep, group = Sample)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to normal") +
  theme_bw() +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])

pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate( Sample , into = c( "xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  mutate(prep = unlist(prep_l[Sample] )) %>%
  group_by(Sample, treatment, day, replicate, set, prep) %>%
  summarise(mean_value = mean(value)) %>%
  ggplot(aes(prep, mean_value, fill= prep)) +
  geom_boxplot() +
  ggbeeswarm::geom_beeswarm() +
  ggtitle("log2FC to normal") +
  ggpubr::stat_compare_means() +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])
## `summarise()` has grouped output by 'Sample', 'treatment', 'day', 'replicate',
## 'set'. You can override using the `.groups` argument.

pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate( Sample , into = c( "xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  mutate(prep = unlist(prep_l[Sample] )) %>%
  group_by(Sample, treatment, day, replicate, set, prep) %>%
  summarise(mean_value = mean(value)) %>%
  #filter(treatment == "ctrl") %>%
  ggplot(aes(prep, mean_value, fill= prep)) +
  geom_boxplot() +
  ggbeeswarm::geom_beeswarm() +
  ggtitle("log2FC to normal") +
  ggpubr::stat_compare_means() +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)]) +
  facet_grid(day~treatment)
## `summarise()` has grouped output by 'Sample', 'treatment', 'day', 'replicate',
## 'set'. You can override using the `.groups` argument.
## Warning: Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Caused by error:
## ! argument "x" is missing, with no default

Colored by Set
pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_"), drop = F) %>%
  separate( Sample , into = c("xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  ggplot(aes(value, fill= set, group = Sample)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to normal") +
  theme_bw() +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])

pY_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate( Sample , into = c( "xenograft", "treatment", "day", "replicate", "set"), sep = "_", remove = F) %>%
  group_by(Sample, treatment, day, replicate, set) %>%
  summarise(mean_value = mean(value)) %>%
  ggplot(aes(set, mean_value, fill= set)) +
  geom_boxplot() +
  ggbeeswarm::geom_beeswarm() +
  ggtitle("log2FC to normal") +
  ggpubr::stat_compare_means() +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])
## `summarise()` has grouped output by 'Sample', 'treatment', 'day', 'replicate'.
## You can override using the `.groups` argument.

pST Abundances

Set 1

pST_Set1 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e4) +
  ggtitle("Raw abundances")

pST_Set1 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e4) +
  ggtitle("Abundances normalised to sup")

pST_Set1 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Raw abundances")

pST_Set1 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Abundances normalised to sup")

pST_Set1 %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to bridge")

Set 2

pST_Set2 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e4) +
  ggtitle("Raw abundances")

pST_Set2 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e4) +
  ggtitle("Abundances normalised to sup")

pST_Set2 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Raw abundances")

pST_Set2 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Abundances normalised to sup")

pST_Set2 %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to bridge")

Combined

pST_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate(Sample, into = c("treatment", "replicate"), remove = F) %>%
  ggplot(aes(value, fill= treatment, group = Sample)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to normal") +
  theme_bw() +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)]) +
  geom_vline(xintercept = 0)

pST_noNA %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate(Sample, into = c("treatment", "replicate"), remove = F) %>%
  ggplot(aes(Sample, value, fill= treatment)) +
  geom_boxplot() +
  ggtitle("log2FC to bridge") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_manual(values = PGPalette[c(1, 2, 4, 5)])

pST_noNA %>%
  select(contains("log2FC")) %>%
  select(!contains("normal")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate(Sample, into = c("treatment", "replicate"), remove = F) %>%
  ggplot(aes(value, fill= treatment, group = Sample)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to normal") +
  theme_bw() +
  scale_fill_manual(values = PGPalette[c(1, 2, 5)])

Whole proteome abundances

Set 1

prot_Set1 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  geom_density(alpha=0.5) +
  xlim(0,10e5) +
  ggtitle("Raw abundances")

prot_Set1 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10) +
  ggtitle("Abundances normalised to sup")

prot_Set1 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Raw abundances")

prot_Set1 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Abundances normalised to sup")

prot_Set1 %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to bridge")

Set 2

prot_Set2 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e5) +
  ggtitle("Raw abundances")

prot_Set2 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10) +
  ggtitle("Abundances normalised to sup")

prot_Set2 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Raw abundances")

prot_Set2 %>%
  select(contains("TMTNorm_Abundance")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  scale_x_log10() +
  ggtitle("Abundances normalised to sup")

prot_Set2 %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to bridge")

Combined

prot_top3peptidemedian %>%
  select(contains("log2FC")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to normal")

prot_top3peptidemedian %>%
  select(contains("log2FC")) %>%
  select(!contains("normal")) %>%
  pivot_longer(names_to = "Sample", cols = everything()) %>%
  mutate(Sample = str_remove(Sample, "log2FC_")) %>%
  separate(Sample, into = c("treatment", "replicate"), remove = F) %>%
  ggplot(aes(value, fill= treatment, group = Sample)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  ggtitle("log2FC to normal") +
  theme_bw() +
  scale_fill_manual(values = PGPalette[c(1, 2, 5)])

t(pST_mat_nonormal) %>%
  as.data.frame(  ) %>%
  rownames_to_column( "peptide") %>%
  pivot_longer(-peptide, names_to = "sample", values_to = "log2FC") %>%
  mutate(sample = gsub( "log2FC_", "", sample)) %>%
  separate(sample, into = c("treatment", "replicate"), sep = "-",remove = F)   %>%
  separate(peptide, into = c("HGNC_Symbol", "Annotated_Sequence"), sep =   "_", remove = F ) %>%
  group_by(sample, treatment, replicate) %>%
  summarise("Mean of patient" = mean(log2FC)) %>%
  ungroup() %>%
  mutate(treatment = as.factor(treatment)) %>%
  mutate(treatment = factor(treatment, levels = c("WT", "G34R", "K27M"))) %>%
  ggplot(aes( treatment, `Mean of patient`, fill = treatment )) +
  geom_boxplot(outlier.size = 0) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90),
  axis.title.x = element_blank()) +
  scale_fill_manual(values = PGPalette[c(5,1,2)]) +
  ggbeeswarm::geom_beeswarm() +
  ggpubr::stat_compare_means(method = "t.test",
  comparisons = list(c("WT", "G34R"),
  c("WT", "K27M"),
  c("K27M", "G34R")) ) +
  ggtitle("pST median normalised log2 fold change")

pY Mean-Variance Relationship

Set 1

pY_Set1 %>%
  mutate(peptide = paste0( HGNC_Symbol, "_", `Annotated Sequence` )) %>%
  select(peptide, contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = -peptide) %>%
  group_by(peptide) %>%
  summarise(mean = mean(value), sd = sd (value) ) %>%
  ggplot(aes(mean, sd)) +
  xlim(0,10e4) +
  ylim(0,10e4) +
  #geom_histogram(bins= 200) +
  geom_point() +
  ggtitle("pY sd vs. mean") +
  ggpubr::stat_cor()+
  geom_smooth(method = "lm")
## Warning: Removed 20 rows containing non-finite values (`stat_cor()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 20 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 20 rows containing missing values (`geom_point()`).

pY_Set1 %>%
  mutate(peptide = paste0( HGNC_Symbol, "_", `Annotated Sequence` )) %>%
  select(peptide, contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = -peptide) %>%
  mutate(log2value = log2(value) ) %>%
  group_by(peptide) %>%
  summarise(meanlog2 = mean(log2value), sdlog2 = sd (log2value) ) %>%
  ggplot(aes(meanlog2, sdlog2)) +
  #ylim(0,10e4) +
  #scale_x_log10() +
  #geom_histogram(bins= 200) +
  geom_point() +
  ggtitle("pY sd vs. mean") +
  ggpubr::stat_cor()+
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

Set 2

pY_Set2 %>%
  mutate(peptide = paste0( HGNC_Symbol, "_", `Annotated Sequence` )) %>%
  select(peptide, contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = -peptide) %>%
  group_by(peptide) %>%
  summarise(mean = mean(value), sd = sd (value) ) %>%
  ggplot(aes(mean, sd)) +
  xlim(0,10e4) +
  ylim(0,10e4) +
  #geom_histogram(bins= 200) +
  geom_point() +
  ggtitle("pY sd vs. mean") +
  ggpubr::stat_cor()+
  geom_smooth(method = "lm")
## Warning: Removed 36 rows containing non-finite values (`stat_cor()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 36 rows containing missing values (`geom_point()`).

pY_Set2 %>%
  mutate(peptide = paste0( HGNC_Symbol, "_", `Annotated Sequence` )) %>%
  select(peptide, contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = -peptide) %>%
  mutate(log2value = log2(value) ) %>%
  group_by(peptide) %>%
  summarise(meanlog2 = mean(log2value), sdlog2 = sd (log2value) ) %>%
  ggplot(aes(meanlog2, sdlog2)) +
  #ylim(0,10e4) +
  #scale_x_log10() +
  #geom_histogram(bins= 200) +
  geom_point() +
  ggtitle("pY sd vs. mean") +
  ggpubr::stat_cor()+
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

pY Abundances per peptide

Set 1

pY_Set1 %>%
  select(contains("Abundance"), -contains("TMT")) %>%
  pivot_longer(names_to = "Channel", cols = everything()) %>%
  ggplot(aes(value, fill= Channel)) +
  #geom_histogram(bins= 200) +
  geom_density(alpha=0.5) +
  xlim(0,10e4) +
  ggtitle("Raw abundances")
## Warning: Removed 374 rows containing non-finite values (`stat_density()`).

Session Info

sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.2             forcats_1.0.0              
##  [3] stringr_1.5.0               dplyr_1.1.2                
##  [5] purrr_1.0.2                 readr_2.1.4                
##  [7] tidyr_1.3.0                 tibble_3.2.1               
##  [9] ggplot2_3.4.2               tidyverse_2.0.0            
## [11] mdatools_0.14.0             SummarizedExperiment_1.28.0
## [13] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
## [15] MatrixGenerics_1.10.0       matrixStats_1.0.0          
## [17] DEP_1.20.0                  org.Hs.eg.db_3.16.0        
## [19] AnnotationDbi_1.60.2        IRanges_2.32.0             
## [21] S4Vectors_0.36.2            Biobase_2.58.0             
## [23] BiocGenerics_0.44.0         fgsea_1.24.0               
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.1        circlize_0.4.15        fastmatch_1.1-3       
##   [4] plyr_1.8.8             igraph_1.5.1           gmm_1.8               
##   [7] splines_4.2.3          shinydashboard_0.7.2   BiocParallel_1.32.6   
##  [10] digest_0.6.33          foreach_1.5.2          htmltools_0.5.6       
##  [13] fansi_1.0.4            magrittr_2.0.3         memoise_2.0.1         
##  [16] cluster_2.1.4          doParallel_1.0.17      tzdb_0.4.0            
##  [19] limma_3.54.2           ComplexHeatmap_2.14.0  Biostrings_2.66.0     
##  [22] imputeLCMD_2.1         sandwich_3.0-2         timechange_0.2.0      
##  [25] colorspace_2.1-0       blob_1.2.4             xfun_0.40             
##  [28] crayon_1.5.2           RCurl_1.98-1.12        jsonlite_1.8.7        
##  [31] impute_1.72.3          zoo_1.8-12             iterators_1.0.14      
##  [34] glue_1.6.2             hash_2.2.6.2           gtable_0.3.3          
##  [37] zlibbioc_1.44.0        XVector_0.38.0         GetoptLong_1.0.5      
##  [40] DelayedArray_0.24.0    car_3.1-2              shape_1.4.6           
##  [43] abind_1.4-5            scales_1.2.1           vsn_3.66.0            
##  [46] mvtnorm_1.2-2          DBI_1.1.3              rstatix_0.7.2         
##  [49] Rcpp_1.0.11            plotrix_3.8-2          mzR_2.32.0            
##  [52] xtable_1.8-4           clue_0.3-64            bit_4.0.5             
##  [55] preprocessCore_1.60.2  sqldf_0.4-11           MsCoreUtils_1.10.0    
##  [58] DT_0.28                htmlwidgets_1.6.2      httr_1.4.6            
##  [61] gplots_3.1.3           RColorBrewer_1.1-3     ellipsis_0.3.2        
##  [64] farver_2.1.1           pkgconfig_2.0.3        XML_3.99-0.14         
##  [67] sass_0.4.7             utf8_1.2.3             STRINGdb_2.10.1       
##  [70] labeling_0.4.2         tidyselect_1.2.0       rlang_1.1.1           
##  [73] later_1.3.1            munsell_0.5.0          tools_4.2.3           
##  [76] cachem_1.0.8           cli_3.6.1              gsubfn_0.7            
##  [79] generics_0.1.3         RSQLite_2.3.1          broom_1.0.5           
##  [82] evaluate_0.21          fastmap_1.1.1          mzID_1.36.0           
##  [85] yaml_2.3.7             knitr_1.43             bit64_4.0.5           
##  [88] caTools_1.18.2         KEGGREST_1.38.0        ncdf4_1.21            
##  [91] nlme_3.1-163           mime_0.12              compiler_4.2.3        
##  [94] rstudioapi_0.15.0      beeswarm_0.4.0         png_0.1-8             
##  [97] ggsignif_0.6.4         affyio_1.68.0          stringi_1.7.12        
## [100] bslib_0.5.0            highr_0.10             MSnbase_2.24.2        
## [103] lattice_0.21-8         ProtGenerics_1.30.0    Matrix_1.6-0          
## [106] tmvtnorm_1.5           vctrs_0.6.3            pillar_1.9.0          
## [109] norm_1.0-11.1          lifecycle_1.0.3        BiocManager_1.30.22   
## [112] jquerylib_0.1.4        MALDIquant_1.22.1      GlobalOptions_0.1.2   
## [115] data.table_1.14.8      cowplot_1.1.1          bitops_1.0-7          
## [118] httpuv_1.6.11          R6_2.5.1               pcaMethods_1.90.0     
## [121] affy_1.76.0            promises_1.2.1         KernSmooth_2.23-22    
## [124] vipor_0.4.5            codetools_0.2-19       MASS_7.3-60           
## [127] gtools_3.9.4           assertthat_0.2.1       chron_2.3-61          
## [130] proto_1.0.0            rjson_0.2.21           withr_2.5.0           
## [133] GenomeInfoDbData_1.2.9 mgcv_1.9-0             parallel_4.2.3        
## [136] hms_1.1.3              grid_4.2.3             rmarkdown_2.23        
## [139] carData_3.0-5          ggpubr_0.6.0           shiny_1.7.4.1         
## [142] ggbeeswarm_0.7.2
knitr::knit_exit()